Inspecting NGRT Over Past 4 Years

Initially when I began creating these graphs I partitioned the data into two groups, “below 11 years old reading age” and “not below 11 years old reading age”. Thus, the following graphics showcase the absolute values and then the proportions for each respective year.

Absolute Value

total_NGRT %>%
  group_by(below_11_years_old, Year) %>% 
  summarise(total = n()) %>% 
  ungroup() %>%
  ggplot(aes(x = Year, y = total, fill = fct_rev(below_11_years_old)))+
  geom_bar(stat = "identity", position = "dodge", color = "black")+
  theme_bw()+
  labs(fill = "Below 11 \nYears Old",
       title = "Number of Students Below a 11 Year Old Reading Age")

Percentage

total_NGRT %>%
  group_by(below_11_years_old, Year) %>% 
  summarise(total = n()) %>% 
  ungroup() %>%
  group_by(Year) %>% 
  mutate(percent = total/sum(total)*100) %>% 
  ggplot(aes(x = Year, y = percent, fill = fct_rev(below_11_years_old)))+
  geom_bar(stat = "identity", position = "stack", color = "black")+
  scale_y_continuous(name="Percent (%)", limits=c(0, 100), breaks = seq(0,100,10))+
  theme_bw()+
  labs(fill = "Below 11 \nYears Old",
       title = "Percentage of Students Below 11 Years Old Reading Age")

Inspecting via Gender and Year

Absolute Value

total_NGRT %>%
  group_by(below_11_years_old, Year, Gender) %>% 
  summarise(total = n()) %>% 
  ungroup() %>%
  mutate(Gender = as.factor(Gender)) %>% 
  ggplot(aes(x = Year, y = total, fill = fct_rev(below_11_years_old)))+
  geom_bar(stat = "identity", position = "dodge", color = "black")+
  scale_y_continuous(name="Frequency", limits=c(0, 60), breaks = seq(0,60,10))+
  facet_wrap(~Gender)+
  theme_bw()+
  labs(fill = "Below 11 \nYears Old",
       title = "Number of Students Below 11 Years Old Reading Age")

Percentage

total_NGRT %>%
  group_by(below_11_years_old, Year, Gender) %>% 
  summarise(total = n()) %>% 
  ungroup() %>%
  group_by(Year,Gender) %>% 
  mutate(percent = total/sum(total)*100) %>%
  ungroup() %>%
  mutate(Gender = as.factor(Gender)) %>%
  ggplot(aes(x = Year, y = percent, fill = fct_rev(below_11_years_old)))+
  geom_bar(stat = "identity", color = "black")+
  scale_y_continuous(name="Percent (%)", limits=c(0, 100), breaks = seq(0,100,10))+
  facet_wrap(~Gender)+
  theme_bw()+
  labs(fill = "Below 11 \nYears Old",
       title = "Percentage of Students Below 11 Years Old Reading Age")

Enriching the NGRT Data

To enrich the NRGT data, I incorporated the Dynamic Student List Report from OneSchool to identify demographic information for each observation of the reading dataset. This was done merging each dataset together using EQ_ID. The information I was interested in was “origin school” and if any trends were present when students were grouped by their primary school.

Not all students that took the NGRT test in previous years, 2018 - 2020, were enrolled at Tully SHS during the generation of this report. Despite this, students with a “Left” enrolment status were still used in the following graphics. I also observed an error in the Dynamic Student List data, in that some students had “Tully State High School” as their origin school. Only 5 students were observed with this error and these students were placed into “Tully State School” category after each student was manually inspected using OneSchool.

Inspection by Origin School

Absolute Value

total_NGRT_school_data %>% 
  filter(is.na(Student_Name) == F) %>% 
  group_by(Origin_School, below_11_years_old) %>% 
  summarise(value = n()) %>%
  ungroup() %>%
  mutate(Origin_School = ifelse(value<4, "Small Schools (< 4 Students)",Origin_School)) %>%
  group_by(Origin_School, below_11_years_old) %>% 
  summarise(total_school_amount = sum(value)) %>% 
  ungroup() %>% 
  ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = total_school_amount, fill = fct_rev(below_11_years_old)))+ 
  geom_bar(stat = "identity", color = "black")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=60, hjust=1))+
  scale_y_continuous(name="Frequency", limits=c(0, 200), breaks = seq(0,200,20))+
  labs(fill = "Below Reading\nAge",
       x = "Primary School",
       title = "Number of Students Below a 11 Year Old Reading Age")

Percentage

total_NGRT_school_data %>% 
  filter(is.na(Student_Name) == F) %>% 
  group_by(Origin_School, below_11_years_old) %>% 
  summarise(value = n()) %>%
  ungroup() %>%
  mutate(Origin_School = ifelse(value<4, "Small Schools (< 4 Students)",Origin_School)) %>%
  group_by(Origin_School, below_11_years_old) %>% 
  summarise(total_school_amount = sum(value)) %>%
  ungroup() %>%
  group_by(Origin_School) %>% 
  mutate(percent = round(total_school_amount/sum(total_school_amount)*100,2)) %>%
  ungroup() %>%
  ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = percent, fill = fct_rev(below_11_years_old)))+ 
  geom_bar(stat = "identity", color = "black")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=60, hjust=1))+
  scale_y_continuous(name="Percent (%)", limits=c(0, 100), breaks = seq(0,100,10))+
  labs(fill = "Below Reading\nAge",
       x = "Primary School",
       title = "Percentage Distribution of Students Below a 11 Year Old Reading Age")

Inspection of Student Reading Level by Origin School over the Last 4 Years

Absolute Value

total_NGRT_school_data %>% 
  filter(is.na(Student_Name) == F) %>% 
  group_by(Origin_School, below_11_years_old, Year.x) %>% 
  summarise(value = n()) %>%
  ungroup() %>% 
  mutate(Origin_School = ifelse(value<2, "Small Schools (< 2 Students)",Origin_School)) %>%
  group_by(Origin_School, below_11_years_old, Year.x) %>% 
  summarise(total_school_amount = sum(value)) %>% 
  ungroup() %>%
  ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = total_school_amount, fill = fct_rev(below_11_years_old)))+ 
  geom_bar(stat = "identity", color = "black")+
  facet_grid(Year.x~., scales = "free")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=60, hjust=1), legend.position = "none")+
  scale_y_continuous(name="Frequency", limits=c(0, 60), breaks = seq(0,60,10))+
  labs(x = "Primary School",
       title = "Number of Students Below a 11 Year Old Reading Age")

Percent

total_NGRT_school_data %>% 
  filter(is.na(Student_Name) == F) %>% 
  group_by(Origin_School, below_11_years_old, Year.x) %>% 
  summarise(value = n()) %>%
  ungroup() %>% 
  mutate(Origin_School = ifelse(value<2, "Small Schools (< 2 Students)",Origin_School)) %>%
  group_by(Origin_School, below_11_years_old, Year.x) %>% 
  summarise(total_school_amount = sum(value)) %>% 
  ungroup() %>%
  group_by(Origin_School, Year.x) %>% 
  mutate(percent = round(total_school_amount/sum(total_school_amount)*100,2))%>%
  ggplot(aes(x = fct_reorder(Origin_School, -total_school_amount), y = percent, fill = fct_rev(below_11_years_old)))+ 
  geom_bar(stat = "identity", color = "black")+
  facet_grid(Year.x~., scales = "free")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=60, hjust=1), legend.position = "none")+
  scale_y_continuous(name="Percent (%)", limits=c(0, 100), breaks = seq(0,100,10))+
  labs(x = "Primary School",
       title = "Percentage of Students Below 11 Year Old Reading Age",
       subtitle = "Last 4 Years")

Inspecting Overall Reading Age

The NGRT dataset provided to the school using a 95% confidence interval. This means that if the test was repeated 100 times, student reading agaes would flucuate between the upper and lower CI interval 95 times (95% accuracy rating). For this reason, I only used the Overall Reading Age was used to inspect performance of the students over the last 4 years. However, the dataset that was provided by the school was not in an ideal formate and utilised a 12 point scale between each year from 10:00 to 10:11.

To inspect distribution of the reading age by a variety of factors, I needed to transform this categorical scale to a numeric scale. To achieve this, I separated each reading age into years and months using the “:”, added 1 to each of the months to get rid of the zero values, remove the plus signs (17:00+) from high reading ages, divded months by 13 to get a decimal value and added that decimal value to the years column. The reason, I did not divide by 13 was becuase when I added 1 to to a value of 11 and then divided by 12 it returned a value of 1 thus, resulting in an incorrect reading age (increasing reading age by 1 and not 0.92). Also all student above 17 years old were grouped together because the upper limiter was not specified.

all_metrics <- rbind(metrics_2018, metrics_2019, metrics_2020, metrics_2021)

rownames(all_metrics) <- c("2018","2019","2020","2021")

all_metrics$Max <- "17:00+"

all_metrics<- as.data.frame(t(all_metrics))

knitr::kable(all_metrics, align = "lccrr", digits = 2)
2018 2019 2020 2021
Max 17:00+ 17:00+ 17:00+ 17:00+
Q3 14.308 14.077 14.308 13.692
Median 12.154 12.923 12.154 11.615
Q1 10.30800 10.92300 9.25025 9.42350
Min 5.308 5.923 5.154 5.077
total_NGRT_school_data %>% 
  ggplot(aes(x = Year.x, y = Years_as_numeric, fill = Year.x))+
  geom_boxplot()+
  scale_y_continuous(name = "Reading Age", limits = c(5.0,17.5), breaks = seq(5.0,17.5,0.5))+
  theme_bw()

Distribution By Year

2018

hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2018")], xaxt = "n", main = "Distribution of Reading Age for 2018", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

2019

hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2019")], xaxt = "n", main = "Distribution of Reading Age for 2019", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

2020

hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2020")], xaxt = "n", main = "Distribution of Reading Age for 2020", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

2021

hist(total_NGRT_school_data$Years_as_numeric[which(total_NGRT_school_data$Year.x == "2021")], xaxt = "n", main = "Distribution of Reading Age for 2021", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

Past 4 Years Combined

hist(total_NGRT_school_data$Years_as_numeric, xaxt = "n", main = "Distribution of Reading Age", xlab = "Reading Age")
axis(side = 1, at = seq(4,18,1))

Distribution of Reading Age For Each School

Year

NGRT_year <- total_NGRT_school_data %>%
  filter(is.na(Origin_School) == F) %>%
  group_by(Origin_School) %>%
  select(Origin_School, Years_as_numeric, Year.x) %>%
  mutate(school_count = n(),
         Origin_School = ifelse(school_count <= 5, "Small School (<=5)", Origin_School)) %>%
  ungroup()

Years_dist<- NGRT_year %>% ggplot(aes(Years_as_numeric, fill =Year.x))+
  facet_wrap(~Origin_School, scales = "free")+
  geom_density(alpha = .5,color = "black")+
  theme(axis.text.x = element_text(angle=60, hjust=1))+
  theme_bw()

ggplotly(Years_dist)

Gender

total_NGRT_school_data %>%
  filter(is.na(Origin_School) == F) %>%
  group_by(Origin_School) %>%
  select(Origin_School, Years_as_numeric, Gender) %>%
  mutate(school_count = n(),
         Origin_School = ifelse(school_count <= 5, "Small School (<=5)", Origin_School)) %>%
  ungroup() %>% 
  ggplot(aes(Years_as_numeric, color = fct_rev(Gender)))+
  facet_wrap(~Origin_School, scales = "free")+
  geom_density()+
  theme_bw()+
  labs(title = "Distribution of Gender per Primary School",
  fill = "Gender")

Indigenous Status

total_NGRT_school_data %>%
  filter(is.na(Origin_School) == F) %>%
  group_by(Origin_School) %>%
  select(Origin_School, Years_as_numeric, Indigenous) %>%
  mutate(school_count = n(),
         Origin_School = ifelse(school_count <= 5, "Small School (<=5)", Origin_School)) %>%
  ungroup() %>% 
  ggplot(aes(Years_as_numeric, color = fct_rev(Indigenous)))+
  facet_wrap(~Origin_School, scales = "free")+
  geom_density()+
  theme_bw()+
  labs(title = "Distribution of Indigenous Status per Primary School",
  fill = "Indigenous")

Grouping Reading Age into Categories

All

library(knitr)

all_data <- total_NGRT_school_data %>% 
  select(Year.x,Years_as_numeric) %>%
  mutate(Over_15 = ifelse(Years_as_numeric>=15,1,0),
         btw_15_and_13 = ifelse(Years_as_numeric<15 & Years_as_numeric>=13,1,0),
         btw_13_and_11 = ifelse(Years_as_numeric<13 & Years_as_numeric>=11,1,0),
         btw_11_and_9 = ifelse(Years_as_numeric<11 & Years_as_numeric>=9,1,0),
         btw_9_and_7 = ifelse(Years_as_numeric<9 & Years_as_numeric>=7,1,0),
         Under_7 = ifelse(Years_as_numeric<7,1,0)) %>%
  group_by(Year.x) %>% 
  summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 7 and < 9" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 9 and < 11" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 11 and < 13" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 13 and < 15" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)

show_all_data <- all_data

colnames(show_all_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")

kable(show_all_data, align = "c", digits = 2)
Year \(<7\) \(\geqslant7\) and \(<9\) \(\geqslant9\) and \(<11\) \(\geqslant11\) and \(<13\) \(\geqslant13\) and \(<15\) \(\geqslant15\)
2018 2.01 12.08 19.46 28.19 20.13 18.12
2019 1.96 9.80 15.69 23.53 33.33 15.69
2020 6.25 15.97 15.97 22.92 18.06 20.83
2021 3.75 17.50 19.38 28.12 16.88 14.37
library(reshape2)

melt(all_data, id = "Year.x") %>% 
  ggplot(aes(x = variable, y = value))+
  geom_bar(stat = "identity", color = "black", fill = "steelblue")+
  facet_grid(~Year.x)+
  scale_y_continuous(name="Percentage (%)", limits=c(0, 38), breaks = seq(0,35,5))+
  theme_bw()+
  theme(axis.text.x = element_text(angle=60, hjust=1))+
  labs(title = "Inspecting Reading Age Distribution",
       subtitle = "Over the last 4 years")

Male

Male_data <- total_NGRT_school_data %>% 
  select(Gender,Year.x,Years_as_numeric) %>%
  filter(Gender == "Male") %>% 
  mutate(Over_15 = ifelse(Years_as_numeric>=15,1,0),
         btw_15_and_13 = ifelse(Years_as_numeric<15 & Years_as_numeric>=13,1,0),
         btw_13_and_11 = ifelse(Years_as_numeric<13 & Years_as_numeric>=11,1,0),
         btw_11_and_9 = ifelse(Years_as_numeric<11 & Years_as_numeric>=9,1,0),
         btw_9_and_7 = ifelse(Years_as_numeric<9 & Years_as_numeric>=7,1,0),
         Under_7 = ifelse(Years_as_numeric<7,1,0)) %>%
  group_by(Year.x) %>% 
  summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 7 and < 9" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 9 and < 11" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 11 and < 13" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 13 and < 15" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)

show_male_data <- Male_data

colnames(show_male_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")

kable(show_male_data, align = "c", digits = 2)
Year \(<7\) \(\geqslant7\) and \(<9\) \(\geqslant9\) and \(<11\) \(\geqslant11\) and \(<13\) \(\geqslant13\) and \(<15\) \(\geqslant15\)
2018 1.32 15.79 17.11 30.26 19.74 15.79
2019 3.77 11.32 15.09 22.64 35.85 11.32
2020 7.58 19.70 15.15 21.21 18.18 18.18
2021 7.50 21.25 22.50 20.00 13.75 15.00
melt(Male_data, id = "Year.x") %>% 
  ggplot(aes(x = variable, y = value))+
  geom_bar(stat = "identity", color = "black", fill = "steelblue")+
  facet_grid(~Year.x)+
  scale_y_continuous(name="Percentage (%)", limits=c(0, 38), breaks = seq(0,35,5))+
  theme_bw()+
  theme(axis.text.x = element_text(angle=60, hjust=1))+
  labs(title = "Inspecting Male Reading Age Distribution",
       subtitle = "Over the last 4 years")

Female

female_data <- total_NGRT_school_data %>% 
  select(Gender,Year.x,Years_as_numeric) %>%
  filter(Gender == "Female") %>% 
  mutate(Over_15 = ifelse(Years_as_numeric>=15,1,0),
         btw_15_and_13 = ifelse(Years_as_numeric<15 & Years_as_numeric>=13,1,0),
         btw_13_and_11 = ifelse(Years_as_numeric<13 & Years_as_numeric>=11,1,0),
         btw_11_and_9 = ifelse(Years_as_numeric<11 & Years_as_numeric>=9,1,0),
         btw_9_and_7 = ifelse(Years_as_numeric<9 & Years_as_numeric>=7,1,0),
         Under_7 = ifelse(Years_as_numeric<7,1,0)) %>%
  group_by(Year.x) %>% 
  summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 7 and < 9" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 9 and < 11" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 11 and < 13" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 13 and < 15" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)

show_female_data <- female_data

colnames(show_female_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")

kable(show_female_data, align = "c", digits = 2)
Year \(<7\) \(\geqslant7\) and \(<9\) \(\geqslant9\) and \(<11\) \(\geqslant11\) and \(<13\) \(\geqslant13\) and \(<15\) \(\geqslant15\)
2018 2.74 8.22 21.92 26.03 20.55 20.55
2019 0.00 8.16 16.33 24.49 30.61 20.41
2020 5.13 12.82 16.67 24.36 17.95 23.08
2021 0.00 13.75 16.25 36.25 20.00 13.75
melt(female_data, id = "Year.x") %>% 
  ggplot(aes(x = variable, y = value))+
  geom_bar(stat = "identity", color = "black", fill = "pink")+
  scale_y_continuous(name="Percentage (%)", limits=c(0, 38), breaks = seq(0,35,5))+
  facet_grid(~Year.x)+
  theme_bw()+
  theme(axis.text.x = element_text(angle=60, hjust=1))+
  labs(title = "Inspecting Female Reading Age Distribution",
       subtitle = "Over the last 4 years")

Indigenous Status

indigenous_data <- total_NGRT_school_data %>% 
  select(Indigenous,Year.x,Years_as_numeric) %>%
  filter(Indigenous == "Y") %>% 
  mutate(Over_15 = ifelse(Years_as_numeric>=15,1,0),
         btw_15_and_13 = ifelse(Years_as_numeric<15 & Years_as_numeric>=13,1,0),
         btw_13_and_11 = ifelse(Years_as_numeric<13 & Years_as_numeric>=11,1,0),
         btw_11_and_9 = ifelse(Years_as_numeric<11 & Years_as_numeric>=9,1,0),
         btw_9_and_7 = ifelse(Years_as_numeric<9 & Years_as_numeric>=7,1,0),
         Under_7 = ifelse(Years_as_numeric<7,1,0)) %>%
  group_by(Year.x) %>% 
  summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 7 and < 9" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 9 and < 11" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 11 and < 13" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 13 and < 15" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)

show_indig_data <- indigenous_data

colnames(show_indig_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")

kable(show_indig_data, align = "c", digits = 2)
Year \(<7\) \(\geqslant7\) and \(<9\) \(\geqslant9\) and \(<11\) \(\geqslant11\) and \(<13\) \(\geqslant13\) and \(<15\) \(\geqslant15\)
2018 6.67 16.67 40.00 16.67 16.67 3.33
2019 5.00 20.00 20.00 20.00 30.00 5.00
2020 10.34 41.38 13.79 20.69 6.90 6.90
2021 3.03 33.33 33.33 24.24 6.06 0.00
melt(indigenous_data, id = "Year.x") %>% 
  ggplot(aes(x = variable, y = value))+
  geom_bar(stat = "identity", color = "black", fill = "darkgreen")+
  facet_grid(~Year.x)+
  scale_y_continuous(name="Percentage (%)", limits=c(0, 42), breaks = seq(0,40,5))+
  theme_bw()+
  theme(axis.text.x = element_text(angle=60, hjust=1))+
  labs(title = "Inspecting Reading Age Distribution in Indigenous Students",
       subtitle = "Over the last 4 years")

Non-Indigenous Status

nonindigenous_data <- total_NGRT_school_data %>% 
  select(Indigenous,Year.x,Years_as_numeric) %>%
  filter(Indigenous == "N") %>% 
  mutate(Over_15 = ifelse(Years_as_numeric>=15,1,0),
         btw_15_and_13 = ifelse(Years_as_numeric<15 & Years_as_numeric>=13,1,0),
         btw_13_and_11 = ifelse(Years_as_numeric<13 & Years_as_numeric>=11,1,0),
         btw_11_and_9 = ifelse(Years_as_numeric<11 & Years_as_numeric>=9,1,0),
         btw_9_and_7 = ifelse(Years_as_numeric<9 & Years_as_numeric>=7,1,0),
         Under_7 = ifelse(Years_as_numeric<7,1,0)) %>%
  group_by(Year.x) %>% 
  summarise("< 7" = sum(Under_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 7 and < 9" = sum(btw_9_and_7)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 9 and < 11" = sum(btw_11_and_9)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 11 and < 13" = sum(btw_13_and_11)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 13 and < 15" = sum(btw_15_and_13)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100,
            ">= 15" = sum(Over_15)/(sum(Under_7)+sum(btw_9_and_7)+sum(btw_11_and_9)+sum(btw_13_and_11)+sum(btw_15_and_13)+sum(Over_15))*100)

show_nonindig_data <- nonindigenous_data

colnames(show_nonindig_data) <- c("Year", "$<7$", "$\\geqslant7$ and $<9$","$\\geqslant9$ and $<11$ ","$\\geqslant11$ and $<13$", "$\\geqslant13$ and $<15$","$\\geqslant15$")

kable(show_nonindig_data, align = "c", digits = 2)
Year \(<7\) \(\geqslant7\) and \(<9\) \(\geqslant9\) and \(<11\) \(\geqslant11\) and \(<13\) \(\geqslant13\) and \(<15\) \(\geqslant15\)
2018 0.86 10.34 14.66 31.03 21.55 21.55
2019 1.25 7.50 15.00 22.50 35.00 18.75
2020 5.22 9.57 16.52 23.48 20.87 24.35
2021 3.94 13.39 15.75 29.13 19.69 18.11
melt(nonindigenous_data, id = "Year.x") %>% 
  ggplot(aes(x = variable, y = value))+
  geom_bar(stat = "identity", color = "black", fill = "orange")+
  facet_grid(~Year.x)+
  scale_y_continuous(name="Percentage (%)", limits=c(0, 42), breaks = seq(0,40,5))+
  theme_bw()+
  theme(axis.text.x = element_text(angle=60, hjust=1))+
  labs(title = "Inspecting Reading Age Distribution in Non-Indigenous Students",
       subtitle = "Over the last 4 years")